Metadata

Close
Metadata
@InProceedings{LopesOlivAlmeAraú:2009:SpFrBa,
               author = "Lopes, Ana Paula Brand{\~a}o and Oliveira, Rodrigo Silva and 
                         Almeida, Jussara Marques de and Ara{\'u}jo, Arnaldo de 
                         Albuquerque",
          affiliation = "{Federal University of Minas Gerais/State University of Santa 
                         Cruz} and {Federal University of Minas Gerais} and {Federal 
                         University of Minas Gerais} and {Federal University of Minas 
                         Gerais}",
                title = "Spatio-Temporal Frames in a Bag-of-visual-features Approach for 
                         Human Actions Recognition",
            booktitle = "Proceedings...",
                 year = "2009",
               editor = "Nonato, Luis Gustavo and Scharcanski, Jacob",
         organization = "Brazilian Symposium on Computer Graphics and Image Processing, 22. 
                         (SIBGRAPI)",
            publisher = "IEEE Computer Society",
              address = "Los Alamitos",
             keywords = "Human Actions, Bag-of-Visual-Features, Video classification.",
             abstract = "The recognition of human actions from videos has several 
                         interesting and important applications, and a vast amount of 
                         different approaches has been proposed for this task in different 
                         settings. Such approaches can be broadly categorized in 
                         model-based and model-free. Typically, model-based approaches work 
                         only in very constrained settings, and because of that, a number 
                         of model-free approaches appeared in the last years. Among them, 
                         those based in bag-of-visual-features (BoVF) have been proving to 
                         be the most consistently successful, being used by several 
                         independent authors. For videos to be represented by BoVFs, 
                         though, an important issue that arises is how to represent dynamic 
                         information. Most existing proposals consider the video as a 
                         spatio-temporal volume and then describe volumetric patches around 
                         3D interest points. In this work, we propose to build a BoVF 
                         representation for videos by collecting 2D interest points 
                         directly. The basic idea is to gather such points not only from 
                         the traditional frames (xy planes), but also from those planes 
                         along the time axis, which we call the spatio-temporal frames. Our 
                         assumption is that such features are able to capture dynamic 
                         information from the videos, and are therefore well-suited to 
                         recognize human actions from them, without the need of 3D 
                         extensions for the descriptors. In our experiments, this approach 
                         achieved state-of-the-art recognition rates on a well-known human 
                         actions database, even when compared to more sophisticated 
                         schemes.",
  conference-location = "Rio de Janeiro, RJ, Brazil",
      conference-year = "11-14 Oct. 2009",
                  doi = "10.1109/SIBGRAPI.2009.17",
                  url = "http://dx.doi.org/10.1109/SIBGRAPI.2009.17",
             language = "en",
                  ibi = "8JMKD3MGPBW4/35THUHP",
                  url = "http://urlib.net/ibi/8JMKD3MGPBW4/35THUHP",
           targetfile = "sibgrapi-actions-2009-FINAL-5-no-bookmarks.pdf",
        urlaccessdate = "2024, Apr. 28"
}